#set up chart theme function and load required packages
chart.theme.size = 14
source("../r_config/r_env_setup.R") #load packages and cutom ggplot theme function and presets
path_data = "../data/ISLR_data/"
library(MASS)
#install.packages("ISLR")
library(ISLR)
# create a numberical vector
x=c(2,7,5)
x
## [1] 2 7 5
# create a vector from sequence from, number of elments and increment by
y=seq(from=4, length=3,by=3)
y
## [1] 4 7 10
# create a vector from sequence from to and increment by
y=seq(from=4, to=13, by=3)
y
## [1] 4 7 10 13
x+y
## [1] 6 14 15 15
x/y
## [1] 0.5000000 1.0000000 0.5000000 0.1538462
x^y
## [1] 16 823543 9765625 8192
x[2] # print second element of vector x - starts from 1
## [1] 7
x[2:3]
## [1] 7 5
x[-2] # remove the element 2 from the vector and return the rest
## [1] 2 5
x[-c(1,2)] # remove elements 1 and 2 from the vector
## [1] 5
# 12 element matrix with 4 rows and 3 col
z=matrix(seq(1,12),4,3)
z
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
# rows 3 to 4 and col 2 to 3
z[3:4,2:3]
## [,1] [,2]
## [1,] 7 11
## [2,] 8 12
z[,2:3] # all rows and columns 2, 3
## [,1] [,2]
## [1,] 5 9
## [2,] 6 10
## [3,] 7 11
## [4,] 8 12
z[,1] # all rows and column 1
## [1] 1 2 3 4
z[,1,drop=FALSE] # retain the matrix
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
dim(z) # no. of rows and col
## [1] 4 3
ls() # list of data objects
## [1] "chart_footer" "chart_footnote"
## [3] "chart_footnote_save" "chart_format_bar_x_axis_rotate"
## [5] "chart_format_bar1" "chart_format_bar2"
## [7] "chart_format_default" "chart_format_hist_no_minor"
## [9] "chart_format_hist_no_vgrid" "chart_format_hist1"
## [11] "chart_theme_bar" "chart_theme_default"
## [13] "chart_theme_histogram" "chart_theme_minimal"
## [15] "chart_theme_scatter" "chart_theme01"
## [17] "chart.line.size" "chart.theme.size"
## [19] "color_highlight_pal" "color_primary_pal"
## [21] "geom_bar_custom" "legend_size_override"
## [23] "legend_title_hide" "legend_top"
## [25] "package" "packages"
## [27] "path_data" "text.size"
## [29] "x" "y"
## [31] "z"
rm(y) # remove vector y
ls()
## [1] "chart_footer" "chart_footnote"
## [3] "chart_footnote_save" "chart_format_bar_x_axis_rotate"
## [5] "chart_format_bar1" "chart_format_bar2"
## [7] "chart_format_default" "chart_format_hist_no_minor"
## [9] "chart_format_hist_no_vgrid" "chart_format_hist1"
## [11] "chart_theme_bar" "chart_theme_default"
## [13] "chart_theme_histogram" "chart_theme_minimal"
## [15] "chart_theme_scatter" "chart_theme01"
## [17] "chart.line.size" "chart.theme.size"
## [19] "color_highlight_pal" "color_primary_pal"
## [21] "geom_bar_custom" "legend_size_override"
## [23] "legend_title_hide" "legend_top"
## [25] "package" "packages"
## [27] "path_data" "text.size"
## [29] "x" "z"
x=runif(50)
x
## [1] 0.59865236 0.07734090 0.50563056 0.24184711 0.29518013 0.99841695
## [7] 0.36579037 0.34105074 0.31082351 0.42464203 0.57105918 0.86676923
## [13] 0.11483153 0.10723313 0.98432896 0.03290910 0.84335147 0.24569949
## [19] 0.24117104 0.56326188 0.38073335 0.57331388 0.77516890 0.06589140
## [25] 0.64475563 0.66179147 0.54256958 0.73781172 0.72120992 0.24783744
## [31] 0.55754446 0.65712876 0.91594592 0.61441602 0.89631957 0.33404201
## [37] 0.95006025 0.30279009 0.78508569 0.01851865 0.48507190 0.29634177
## [43] 0.47050787 0.15363834 0.21350483 0.79741760 0.08071203 0.18318078
## [49] 0.93075617 0.92878729
xd = data.frame(x)
qplot(data=xd, x, geom="histogram", binwidth=0.1) + chart_theme_default
y=rnorm(50)
yd = data.frame(x)
qplot(data=yd, y, geom="histogram", binwidth=0.1)
# combine x and y vectors to a dataframe
data = data.frame(x, y)
#scatterplot x and y
ggplot(data, aes(x=x, y=y)) + chart_theme_default +
geom_point(size = 6, color= color_primary_pal[2], alpha = .5) +
xlab("Random Uniform") +
ylab("Random Normal")
plot(x,y,xlab="Random Uniform",ylab="Random Normal",pch="*",col="blue")
#panel with two charts stacked
#par(mfrow=c(2,1))
plot(x,y)
hist(y)
#panel with two charts side by side
#par(mfrow=c(1,2))
hist(y)
hist(x)
### Reading in data
Auto = read_csv(file = paste(path_data, "Auto.csv", sep =""))
problems(Auto)
## Source: local data frame [139 x 4]
##
## row col expected actual
## 1 188 1 no trailing characters .5
## 2 190 1 no trailing characters .5
## 3 191 1 no trailing characters .5
## 4 195 1 no trailing characters .5
## 5 197 1 no trailing characters .5
## 6 202 1 no trailing characters .5
## 7 203 1 no trailing characters .5
## 8 204 1 no trailing characters .5
## 9 207 1 no trailing characters .5
## 10 212 1 no trailing characters .5
## .. ... ... ... ...
names(Auto)
## [1] "mpg" "cylinders" "displacement" "horsepower"
## [5] "weight" "acceleration" "year" "origin"
## [9] "name"
dim(Auto)
## [1] 397 9
class(Auto)
## [1] "tbl_df" "tbl" "data.frame"
summary(Auto)
## mpg cylinders displacement horsepower
## Min. : 9.00 Min. :3.000 Min. : 68.0 Length:397
## 1st Qu.:17.00 1st Qu.:4.000 1st Qu.:104.0 Class :character
## Median :23.00 Median :4.000 Median :146.0 Mode :character
## Mean :23.35 Mean :5.458 Mean :193.5
## 3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:262.0
## Max. :46.00 Max. :8.000 Max. :455.0
## weight acceleration year origin
## Min. :1613 Min. : 8.00 Min. :70.00 Min. :1.000
## 1st Qu.:2223 1st Qu.:13.80 1st Qu.:73.00 1st Qu.:1.000
## Median :2800 Median :15.50 Median :76.00 Median :1.000
## Mean :2970 Mean :15.56 Mean :75.99 Mean :1.574
## 3rd Qu.:3609 3rd Qu.:17.10 3rd Qu.:79.00 3rd Qu.:2.000
## Max. :5140 Max. :24.80 Max. :82.00 Max. :3.000
## name
## Length:397
## Class :character
## Mode :character
##
##
##
plot(Auto$cylinders,Auto$mpg)
plot(Auto$cyl,Auto$mpg)
attach(Auto)
search()
## [1] ".GlobalEnv" "Auto" "package:ISLR"
## [4] "package:MASS" "package:caret" "package:lattice"
## [7] "package:GGally" "package:energy" "package:xtable"
## [10] "package:htmlTable" "package:knitr" "package:RColorBrewer"
## [13] "package:tidyr" "package:data.table" "package:readr"
## [16] "package:dplyr" "package:gridExtra" "package:grid"
## [19] "package:stringr" "package:lubridate" "package:scales"
## [22] "package:ggthemes" "package:ggplot2" "package:stats"
## [25] "package:graphics" "package:grDevices" "package:utils"
## [28] "package:datasets" "package:methods" "Autoloads"
## [31] "package:base"
plot(cylinders,mpg)
cylinders=as.factor(cylinders)
plot(cylinders,mpg,xlab="Cylinders",ylab="Mpg",col="red")
pairs(mpg~cylinders+acceleration+weight,Auto)
#Linear regression
names(Boston)
## [1] "crim" "zn" "indus" "chas" "nox" "rm" "age"
## [8] "dis" "rad" "tax" "ptratio" "black" "lstat" "medv"
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio black
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12
## lstat medv
## 1 4.98 24.0
## 2 9.14 21.6
## 3 4.03 34.7
## 4 2.94 33.4
## 5 5.33 36.2
## 6 5.21 28.7
#?Boston
plot(medv~lstat,Boston)
fit1=lm(medv~lstat,data=Boston)
fit1
##
## Call:
## lm(formula = medv ~ lstat, data = Boston)
##
## Coefficients:
## (Intercept) lstat
## 34.55 -0.95
summary(fit1)
##
## Call:
## lm(formula = medv ~ lstat, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.168 -3.990 -1.318 2.034 24.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.55384 0.56263 61.41 <2e-16 ***
## lstat -0.95005 0.03873 -24.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.5432
## F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16
abline(fit1,col="red")
names(fit1)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
fit1$coefficients
## (Intercept) lstat
## 34.5538409 -0.9500494
coefficients(fit1)
## (Intercept) lstat
## 34.5538409 -0.9500494
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) 33.448457 35.6592247
## lstat -1.026148 -0.8739505
#predict outcomes (median value) for 3 data points
predict(fit1,data.frame(lstat=c(5,10,15)),interval="confidence")
## fit lwr upr
## 1 29.80359 29.00741 30.59978
## 2 25.05335 24.47413 25.63256
## 3 20.30310 19.73159 20.87461
#Using caret
fit1_caret <- train(medv~lstat, data = Boston, method = "lm")
fit1_caret
## Linear Regression
##
## 506 samples
## 13 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
##
## Summary of sample sizes: 506, 506, 506, 506, 506, 506, ...
##
## Resampling results
##
## RMSE Rsquared RMSE SD Rsquared SD
## 6.293631 0.5397662 0.382599 0.03151111
##
##
names(fit1_caret)
## [1] "method" "modelInfo" "modelType" "results"
## [5] "pred" "bestTune" "call" "dots"
## [9] "metric" "control" "finalModel" "preProcess"
## [13] "trainingData" "resample" "resampledCM" "perfNames"
## [17] "maximize" "yLimits" "times" "terms"
## [21] "coefnames" "xlevels"
names(fit1_caret$finalModel)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
## [13] "xNames" "problemType" "tuneValue" "obsLevels"
summary(fit1_caret)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.168 -3.990 -1.318 2.034 24.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.55384 0.56263 61.41 <2e-16 ***
## lstat -0.95005 0.03873 -24.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.5432
## F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16
fit1_caret$finalModel$coefficients
## (Intercept) lstat
## 34.5538409 -0.9500494
#score the model on the training data
pred1_caret = predict(fit1_caret$finalModel, newdata = Boston)
#predict outcomes (median value) for 3 data points along with confidence intervals
predict(fit1_caret$finalModel,data.frame(lstat=c(5,10,15)),interval="confidence", level = .95)
## fit lwr upr
## 1 29.80359 29.00741 30.59978
## 2 25.05335 24.47413 25.63256
## 3 20.30310 19.73159 20.87461
summary(pred1_caret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.52 18.45 23.76 22.53 27.95 32.91
#coefficients for the model
coefficients(fit1_caret$finalModel)
## (Intercept) lstat
## 34.5538409 -0.9500494
#confidence interval for the model
confint(fit1_caret$finalModel, level = .95)
## 2.5 % 97.5 %
## (Intercept) 33.448457 35.6592247
## lstat -1.026148 -0.8739505
#fitted values from the model object
summary(fit1_caret$finalModel$fitted.values)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.52 18.45 23.76 22.53 27.95 32.91
summary(Boston$medv)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 17.02 21.20 22.53 25.00 50.00
#plot(fit1_caret$finalModel)
#plot residuals and fitted values
ggplot(data = Boston, aes(x = lstat, y = medv)) + chart_theme_default +
geom_point(color = color_primary_pal[1], size = 2, alpha = 0.7) +
geom_point(aes(x = lstat, y = fit1_caret$finalModel$residuals, color = "Residuals"),
size = 2, alpha = 0.7) +
geom_hline(yintercept = 0, color = color_primary_pal[1]) +
geom_line(aes(x = lstat, y = fit1_caret$finalModel$fitted.values)) +
legend_top + legend_size_override(5) + legend_title_hide
#plot actual vs fitted values
ggplot(data = Boston, aes(y = medv, x = pred1_caret)) + chart_theme_default +
geom_point(color = color_primary_pal[1], size = 2, alpha = 0.7)
#Library for high quality model visualization
#http://www.strengejacke.de/sjPlot/sjp.lm/
#https://susanejohnston.wordpress.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/
library(sjPlot)
# load sample data
data(Auto)
# set plot theme
sjp.setTheme(theme = "539")
# plot frequencies
sjp.frq(Auto$year)
sjp.lm(fit1, type = "pred")
par(mfrow=c(2,2))
#plot(fit1)
plot(fit1_caret$finalModel)
#from the curve in the residuals we can see that the model is not quite capturing the non-linearity
#Linear model median value as a function of lstat and age
fit2=lm(medv~lstat+age,data=Boston)
fit2=train(medv~lstat+age, method = "lm", data=Boston)
summary(fit2)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.981 -3.978 -1.283 1.968 23.158
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.22276 0.73085 45.458 < 2e-16 ***
## lstat -1.03207 0.04819 -21.416 < 2e-16 ***
## age 0.03454 0.01223 2.826 0.00491 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.173 on 503 degrees of freedom
## Multiple R-squared: 0.5513, Adjusted R-squared: 0.5495
## F-statistic: 309 on 2 and 503 DF, p-value: < 2.2e-16
fit3=train(medv~., method = "lm", data=Boston)
summary(fit3)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.595 -2.730 -0.518 1.777 26.199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
## crim -1.080e-01 3.286e-02 -3.287 0.001087 **
## zn 4.642e-02 1.373e-02 3.382 0.000778 ***
## indus 2.056e-02 6.150e-02 0.334 0.738288
## chas 2.687e+00 8.616e-01 3.118 0.001925 **
## nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
## rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
## age 6.922e-04 1.321e-02 0.052 0.958229
## dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
## rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
## tax -1.233e-02 3.760e-03 -3.280 0.001112 **
## ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
## black 9.312e-03 2.686e-03 3.467 0.000573 ***
## lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
## F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(fit3$finalModel)
fit4=train(medv~., method = "lm", data=dplyr::select(Boston, -c(age, indus)))
summary(fit4)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.5984 -2.7386 -0.5046 1.7273 26.2373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.341145 5.067492 7.171 2.73e-12 ***
## crim -0.108413 0.032779 -3.307 0.001010 **
## zn 0.045845 0.013523 3.390 0.000754 ***
## chas 2.718716 0.854240 3.183 0.001551 **
## nox -17.376023 3.535243 -4.915 1.21e-06 ***
## rm 3.801579 0.406316 9.356 < 2e-16 ***
## dis -1.492711 0.185731 -8.037 6.84e-15 ***
## rad 0.299608 0.063402 4.726 3.00e-06 ***
## tax -0.011778 0.003372 -3.493 0.000521 ***
## ptratio -0.946525 0.129066 -7.334 9.24e-13 ***
## black 0.009291 0.002674 3.475 0.000557 ***
## lstat -0.522553 0.047424 -11.019 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.736 on 494 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7348
## F-statistic: 128.2 on 11 and 494 DF, p-value: < 2.2e-16
#interaction between lstat and age
fit5=lm(medv~lstat*age,Boston)
summary(fit5)
##
## Call:
## lm(formula = medv ~ lstat * age, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.806 -4.045 -1.333 2.085 27.552
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.0885359 1.4698355 24.553 < 2e-16 ***
## lstat -1.3921168 0.1674555 -8.313 8.78e-16 ***
## age -0.0007209 0.0198792 -0.036 0.9711
## lstat:age 0.0041560 0.0018518 2.244 0.0252 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.149 on 502 degrees of freedom
## Multiple R-squared: 0.5557, Adjusted R-squared: 0.5531
## F-statistic: 209.3 on 3 and 502 DF, p-value: < 2.2e-16
fit6=lm(medv~lstat +I(lstat^2),Boston); summary(fit6)
##
## Call:
## lm(formula = medv ~ lstat + I(lstat^2), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2834 -3.8313 -0.5295 2.3095 25.4148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.862007 0.872084 49.15 <2e-16 ***
## lstat -2.332821 0.123803 -18.84 <2e-16 ***
## I(lstat^2) 0.043547 0.003745 11.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.524 on 503 degrees of freedom
## Multiple R-squared: 0.6407, Adjusted R-squared: 0.6393
## F-statistic: 448.5 on 2 and 503 DF, p-value: < 2.2e-16
attach(Boston)
par(mfrow=c(1,1))
plot(medv~lstat)
points(lstat,fitted(fit6),col="red",pch=20)
fit7=lm(medv~poly(lstat,4))
points(lstat,fitted(fit7),col="blue",pch=20)
plot(1:20,1:20,pch=1:20,cex=2)
#Qualitative predictors
names(Carseats)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
summary(Carseats)
## Sales CompPrice Income Advertising
## Min. : 0.000 Min. : 77 Min. : 21.00 Min. : 0.000
## 1st Qu.: 5.390 1st Qu.:115 1st Qu.: 42.75 1st Qu.: 0.000
## Median : 7.490 Median :125 Median : 69.00 Median : 5.000
## Mean : 7.496 Mean :125 Mean : 68.66 Mean : 6.635
## 3rd Qu.: 9.320 3rd Qu.:135 3rd Qu.: 91.00 3rd Qu.:12.000
## Max. :16.270 Max. :175 Max. :120.00 Max. :29.000
## Population Price ShelveLoc Age
## Min. : 10.0 Min. : 24.0 Bad : 96 Min. :25.00
## 1st Qu.:139.0 1st Qu.:100.0 Good : 85 1st Qu.:39.75
## Median :272.0 Median :117.0 Medium:219 Median :54.50
## Mean :264.8 Mean :115.8 Mean :53.32
## 3rd Qu.:398.5 3rd Qu.:131.0 3rd Qu.:66.00
## Max. :509.0 Max. :191.0 Max. :80.00
## Education Urban US
## Min. :10.0 No :118 No :142
## 1st Qu.:12.0 Yes:282 Yes:258
## Median :14.0
## Mean :13.9
## 3rd Qu.:16.0
## Max. :18.0
fit1=lm(Sales~.+Income*Advertising+Age*Price,Carseats)
summary(fit1)
##
## Call:
## lm(formula = Sales ~ . + Income * Advertising + Age * Price,
## data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9208 -0.7503 0.0177 0.6754 3.3413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5755654 1.0087470 6.519 2.22e-10 ***
## CompPrice 0.0929371 0.0041183 22.567 < 2e-16 ***
## Income 0.0108940 0.0026044 4.183 3.57e-05 ***
## Advertising 0.0702462 0.0226091 3.107 0.002030 **
## Population 0.0001592 0.0003679 0.433 0.665330
## Price -0.1008064 0.0074399 -13.549 < 2e-16 ***
## ShelveLocGood 4.8486762 0.1528378 31.724 < 2e-16 ***
## ShelveLocMedium 1.9532620 0.1257682 15.531 < 2e-16 ***
## Age -0.0579466 0.0159506 -3.633 0.000318 ***
## Education -0.0208525 0.0196131 -1.063 0.288361
## UrbanYes 0.1401597 0.1124019 1.247 0.213171
## USYes -0.1575571 0.1489234 -1.058 0.290729
## Income:Advertising 0.0007510 0.0002784 2.698 0.007290 **
## Price:Age 0.0001068 0.0001333 0.801 0.423812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.011 on 386 degrees of freedom
## Multiple R-squared: 0.8761, Adjusted R-squared: 0.8719
## F-statistic: 210 on 13 and 386 DF, p-value: < 2.2e-16
contrasts(Carseats$ShelveLoc)
## Good Medium
## Bad 0 0
## Good 1 0
## Medium 0 1
##Writing R functions
regplot=function(x,y){
fit=lm(y~x)
plot(x,y)
abline(fit,col="red")
}
attach(Carseats)
regplot(Price,Sales)
#the ... arguments get passed as it is in the function
regplot=function(x,y,...){
fit=lm(y~x)
plot(x,y,...)
abline(fit,col="red")
}
regplot(Price,Sales,xlab="Price",ylab="Sales",col="blue",pch=20)